
load('joining/fec_recipients.rda') ## from FEC summary files

# Write cmte IDs ----------------------------------------------------------

cmtes12_full = tribble(
  ~ cmte_id, ~type, ~party, ~start_date,
  'C00431445', 'pcc', 'd', '2011-01-01', # carry from 2010 cycle as well
  'C00431171', 'pcc', 'r', '1900-01-01',
  'C00010603', 'party', 'd', '2011-01-01',
  'C00003418', 'party', 'r', '2012-05-29',
  'C00494740', 'jfc', 'd', '1900-01-01',
  'C00518282', 'jfc', 'r', '1900-01-01',
  'C00495861', 'sp', 'd', '2011-01-01',
  'C00490045', 'sp', 'r', '1900-01-01'
)

cmtes16_full = tribble(
  ~ cmte_id, ~type, ~party, ~start_date,
  'C00575795', 'pcc', 'd', '1900-01-01',
  'C00580100', 'pcc', 'r', '1900-01-01',
  'C00010603', 'party', 'd', '2016-06-06',
  'C00003418', 'party', 'r', '2016-05-17',
  'C00586537', 'jfc', 'd', '1900-01-01',
  'C00618389', 'jfc', 'r', '1900-01-01',
  'C00618371', 'jfc', 'r', '1900-01-01',
  'C00495861', 'sp', 'd', '2015-01-01',
  'C00637512', 'sp', 'r', '1900-01-01',
  'C00756882', 'sp', 'r', '1900-01-01',
  'C00574533', 'sp', 'r', '1900-01-01',
  'C00618876', 'sp', 'r', '1900-01-01',
  'C00608489', 'sp', 'r', '1900-01-01'
)

cmtes20_full = tribble(
  ~ cmte_id, ~type, ~party, ~start_date,
  'C00703975', 'pcc', 'd', '1900-01-01',
  'C00580100', 'pcc', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00010603', 'party', 'd', '2020-04-24',
  'C00003418', 'party', 'r', '2019-01-01',
  'C00744946', 'jfc', 'd', '1900-01-01',
  'C00618389', 'jfc', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00618371', 'jfc', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00495861', 'sp', 'd', '2020-04-24',
  'C00701888', 'sp', 'd', '1900-01-01',
  'C00669259', 'sp', 'd', '2019-01-01',
  'C00532705', 'sp', 'd', '2019-01-01',
  'C00492140', 'sp', 'd', '2019-01-01',
  'C00637512', 'sp', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00756882', 'sp', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00574533', 'sp', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00618876', 'sp', 'r', '2017-01-01', # carry from 2018 cycle as well
  'C00608489', 'sp', 'r', '2017-01-01' # carry from 2018 cycle as well
)

cmtes12_full = cmtes12_full %>% mutate(
  line_num = if_else(type == 'pcc', '17A', '11AI'),
  cycle_table = 2012L,
  target_cycle = 2012L,
  start_date = as.Date(start_date)
)

cmtes16_full = cmtes16_full %>% mutate(
  line_num = if_else(type == 'pcc', '17A', '11AI'),
  cycle_table = 2016L,
  target_cycle = 2016L,
  start_date = as.Date(start_date)
)

cmtes20_full = cmtes20_full %>% mutate(
  line_num = if_else(type == 'pcc', '17A', '11AI'),
  cycle_table = 2020L,
  target_cycle = 2020L,
  start_date = as.Date(start_date)
)

other_full = tribble(
  ~ cmte_id, ~type, ~party, ~start_date, ~cycle_table, ~target_cycle,
  'C00431445', 'pcc', 'd', '2009-01-01', 2010L, 2012L,
  'C00580100', 'pcc', 'r', '2017-01-01', 2018L, 2020L,
  'C00618389', 'jfc', 'r', '2017-01-01', 2018L, 2020L,
  'C00618371', 'jfc', 'r', '2017-01-01', 2018L, 2020L,
  'C00637512', 'sp', 'r', '2017-01-01', 2018L, 2020L,
  'C00756882', 'sp', 'r', '2017-01-01', 2018L, 2020L,
  'C00574533', 'sp', 'r', '2017-01-01', 2018L, 2020L,
  'C00618876', 'sp', 'r', '2017-01-01', 2018L, 2020L,
  'C00608489', 'sp', 'r', '2017-01-01', 2018L, 2020L
) %>% mutate(line_num = if_else(type=='pcc','17A','11AI'), start_date = as.Date(start_date))

# other pres
# https://www.fec.gov/resources/campaign-finance-statistics/2012/tables/presidential/Pres1_2012_18m.pdf
# PCCs of cain, gingrich, ron paul, rick perry, rick santorum
# https://www.fec.gov/resources/campaign-finance-statistics/2016/tables/presidential/PresCand1_2016_24m.pdf
# PCCs of jeb, carson, cruz, fiorina, kasich, rand, rubio (raised at least 10M from indiv)

other_pres = tribble(
  ~cmte_id, ~type, ~party, ~cycle_table,
  'C00496067', 'other_pcc', 'r', 2012L,
  'C00496497', 'other_pcc', 'r', 2012L,
  'C00495820', 'other_pcc', 'r', 2012L,
  'C00500587', 'other_pcc', 'r', 2012L,
  'C00496034', 'other_pcc', 'r', 2012L,
  'C00579458', 'other_pcc', 'r', 2016L,
  'C00573519', 'other_pcc', 'r', 2016L,
  'C00574624', 'other_pcc', 'r', 2016L,
  'C00577312', 'other_pcc', 'r', 2016L,
  'C00581876', 'other_pcc', 'r', 2016L,
  'C00575449', 'other_pcc', 'r', 2016L,
  'C00458844', 'other_pcc', 'r', 2016L,
) %>%
  mutate(
    target_cycle = cycle_table,
    line_num = '17A',
    start_date = as.Date(str_c(cycle_table - 1, '-01-01'))
  )

other_party = tribble(
  ~cmte_id, ~type, ~party,
  'C00042366', 'other_party', 'd', 
  'C00000935', 'other_party', 'd',
  'C00484642', 'other_party', 'd',
  'C00495028', 'other_party', 'd',
  'C00027466', 'other_party', 'r',
  'C00075820', 'other_party', 'r',
  'C00571703', 'other_party', 'r',
  'C00504530', 'other_party', 'r'
) %>%
  expand_grid(tibble(cycle_table = c(2012L, 2016L, 2020L)) %>% mutate(target_cycle = cycle_table)) %>%
  mutate(line_num = '11AI', start_date = as.Date(str_c(cycle_table - 1, '-01-01')))

congressional = recipients %>%
  filter(cand_office %in% c('H', 'S')) %>%
  transmute(
    cycle_table = as.integer(cycle),
    cmte_id,
    party = case_when(
      cand_pty_affiliation %in% c('DEM', 'DFL') ~ 'd',
      cand_pty_affiliation == 'REP' ~ 'r',
      T ~ NA_character_
    )
  ) %>%
  filter(!is.na(party), cycle_table %in% c(2012, 2016, 2020)) %>%
  mutate(
    type = 'congress',
    start_date = as.Date(str_c(cycle_table - 1, '-01-01')),
    target_cycle = cycle_table,
    line_num = '11AI'
  )

cmtes = bind_rows(
  cmtes12_full,
  cmtes16_full,
  cmtes20_full,
  other_full,
  other_pres,
  other_party,
  congressional
)

# classes to drop
drop_classes = tribble(
  ~cmte_id, ~line_num, ~receipt_tp,
  'C00618371', '11AI', NA_character_,
  'C00580100', '17A', NA_character_,
  'C00608489', '11AI', NA_character_
)

# Summarise FEC hive ------------------------------------------------------

f_include = open_dataset('data/fec_final') %>%
  # these are distinct obsv in cmtes (otherwise they'd duplicate)
  inner_join(cmtes, by = c('cmte_id', 'line_num', 'cycle_table')) %>%
  filter(is.na(memo_cd), start_date <= contb_receipt_dt) %>%
  filter(
    ! (cmte_id == 'C00618371' & line_num == '11AI' & is.na(receipt_tp)),
    ! (cmte_id == 'C00580100' & line_num == '17A'  & is.na(receipt_tp)),
    ! (cmte_id == 'C00608489' & line_num == '11AI' & is.na(receipt_tp))
  ) %>%
  anti_join(drop_classes, by = c('cmte_id', 'line_num', 'receipt_tp')) %>%
  group_by(type, party, target_cycle, cluster) %>%
  summarise(total_amt = sum(contb_receipt_amt, na.rm = T)) %>%
  as.data.frame()

setDT(f_include, key = 'cluster')

fec_clusters = open_dataset('data/all_components/') %>%
  filter(str_sub(id,1,1) %in% c('f', 'e')) %>%
  as.data.frame()

setDT(fec_clusters, key = 'id')
setnames(fec_clusters, 'id', 'cluster')

f_include = fec_clusters[f_include, on = 'cluster']
f_include[, component := fifelse(is.na(component), cluster, as.character(component))][, cluster := NULL]

f_include = f_include[, .(total_amt = sum(total_amt, na.rm = T)), .(component, type, party, target_cycle)]
f_include = dcast(f_include, target_cycle + component ~ party + type, value.var = 'total_amt')
setnafill(f_include, 'const', 0, cols = setdiff(names(f_include), c('target_cycle', 'component')))
setnames(f_include, 'target_cycle', 'cycle')

write_dataset(f_include, 'conts_summ/', max_rows_per_file = 1e6)


# Excluded data -----------------------------------------------------------

f_exclude = f %>%
  inner_join(cmtes, by = c('cmte_id', 'line_num', 'cycle_table')) %>%
  filter(is.na(memo_cd), start_date <= contb_receipt_dt, is.na(receipt_tp)) %>%
  inner_join(drop_classes, by = c('cmte_id', 'line_num')) %>% # now it's inner
  group_by(type, party, target_cycle, cluster) %>%
  summarise(total_amt = sum(contb_receipt_amt, na.rm = T)) %>%
  as.data.frame()

setDT(f_exclude)

f_exclude[,sum(total_amt), .(party,target_cycle,type)]

# party target_cycle   type          V1
# 1:      r         2020    jfc 95627170.35
# 2:      r         2020    pcc 87522854.40
# 3:      r         2020     sp    72170.44
# 4:      r         2016    pcc  3299441.12
# 5:      r         2016     sp    28799.00
# 6:      r         2016    jfc   931593.56
